############################################################
# "Are US UI Benefits Popularly Demanded?"
# Micro-Level Political Participation Regressions and Results
# Alexander Hertel-Fernandez and Konstantin Kashin
# Harvard University Department of Government
############################################################


## This file replicates the regressions of political participation (specifically voting and reporting interest in politics) on economic risk exposure and various socioeconomic and demographic characteristics.

## The data for this analysis comes from a combination of cross-national survey data from the International Social Survey Program's Role of Government Module (ISSP) for 2006 and occupational unemployment rates calculated by Philipp Rehm of Ohio State University from various national labor force surveys (to measure economic risk exposure). Because the occupational unemployment rates are not yet publicly available, we do not post the data for the micro section. However, interested replicators can recompile the datset from the ISSP and request the data from Philipp Rehm directly. His email address is rehm.16@osu.edu and website is: http://polisci.osu.edu/faculty/rehm/index.htm.

## ISSP data can be downloaded from here: http://zacat.gesis.org/webview/index.jsp?object=http://zacat.gesis.org/obj/fStudy/ZA4700.


##Load the required libraries
library(xtable)
library(foreign)
library(mnormt)
library(Zelig)
library(survey)
library(foreach)
library(doBy)
library(estout)
library(cwhmisc)
library(lattice)
library(apsrtable)
library(cem)
library(MatchIt)
library(Amelia)
#source("outreg.txt") # source outreg function for making regression tables #
setwd("~/Dropbox/Rehm Replication/Data/Rehm Replication/micro_analyses")

############################################################

############################################################
## Load and process ISSP 2006 "Role of Government" Data
############################################################

rehm.polvar <- read.dta("issp2006_nolabel.dta")

## Create country dummies based on ccode variable

table(rehm.polvar$ccode)
ccode.f <-(rehm.polvar$ccode)

#year.f <- factor(rehm.polvar$yr_field)
#table(year.f)

## Load variables

pro_urr1 <- rehm.polvar$pro_urr1
dpro_urr1 <- rehm.polvar$dpro_urr1
#These are the preferences for unemployment insurance spending

ur <- rehm.polvar$ur0104
#Occupational unemployment rate

income <- rehm.polvar$income
degree <- rehm.polvar$degree
degree <- as.numeric(degree)
female <- rehm.polvar$female
age <- rehm.polvar$age
age <- as.numeric(age)

unempl <- rehm.polvar$unempl
#Dummy for unemployment

empl_full <- rehm.polvar$empl_full
#Dummy for fully employed

empl_none <- rehm.polvar$empl_none
#Dummy for not in labor force

student <- rehm.polvar$student
retired <- rehm.polvar$retired
church <- rehm.polvar$church

skillspec <- rehm.polvar$skillspec
#Skill specificity

leftright <- rehm.polvar$leftright
#Left-right ideology

weight <- rehm.polvar$weight
#Survey weight

isco88_1d <- rehm.polvar$isco88_1d

ccode <- rehm.polvar$ccode
#Country code

yr_field <- rehm.polvar$yr_field
#Year of fieldwork

vote <- rehm.polvar$vote
#Voted in last major national election

interest <- rehm.polvar$interest
#Expressed interest in politics

sayingov <- rehm.polvar$sayingov
#Believes that they have a say in government

dinterest <- rehm.polvar$dinterest
#Binary interest variable

dsayingov <- rehm.polvar$dsayingov
#Binary for say in government

davgcitinfluence <- rehm.polvar$davgcitinfluence
#Binary for agreeing to the average citizen has influence on government

pol.data <- cbind(ur, income, degree, female, age, unempl, empl_full, empl_none, student, retired, church, skillspec, leftright, vote, dinterest, weight, ccode.f, dnuminfluence, davgcitinfluence)

uipref.data <- cbind(ur, income, degree, female, age, unempl, empl_full, empl_none, student, retired, church, skillspec, leftright, vote, dpro_urr1, weight, ccode.f)

#Subset data to just the USA

pol.data <- data.frame(pol.data)
pol.data.usa <- subset(pol.data, ccode==2)

uipref.data <- data.frame(uipref.data)
uipref.data.usa <- subset(uipref.data, ccode==2)

############################################################
##Descriptive Statistics for US Political Data
############################################################

ur.sum <- summary(pol.data.usa.mi$ur)
ur.sd <- sd(pol.data.usa.mi$ur)
income.sum <- summary(pol.data.usa.mi$income)
income.sd <- sd(pol.data.usa.mi$income)
degree.sum <- summary(pol.data.usa.mi$degree)
degree.sd <- sd(pol.data.usa.mi$degree)
female.sum <- summary(pol.data.usa.mi$female)
female.sd <- sd(pol.data.usa.mi$female)
age.sum <- summary(pol.data.usa.mi$age)
age.sd <- sd(pol.data.usa.mi$age)
unempl.sum <- summary(pol.data.usa.mi$unempl)
unempl.sd <- sd(pol.data.usa.mi$unempl)
emplfull.sum <- summary(pol.data.usa.mi$empl_full)
emplfull.sd <- sd(pol.data.usa.mi$empl_full)
emplnone.sum <- summary(pol.data.usa.mi$empl_none)
emplnone.sd <- sd(pol.data.usa.mi$empl_none)
student.sum <- summary(pol.data.usa.mi$student)
student.sd <- sd(pol.data.usa.mi$student)
retired.sum <- summary(pol.data.usa.mi$retired)
retired.sd <- sd(pol.data.usa.mi$retired)
church.sum <- summary(pol.data.usa.mi$church)
church.sd <- sd(pol.data.usa.mi$church)
skillspec.sum <- summary(pol.data.usa.mi$skillspec)
skillspec.sd <- sd(pol.data.usa.mi$skillspec)
lr.sum <- summary(pol.data.usa.mi$leftright)
lr.sd <- sd(pol.data.usa.mi$leftright)
vote.sum <- summary(pol.data.usa.mi$vote)
vote.sd <- sd(pol.data.usa.mi$vote)
dint.sum <- summary(pol.data.usa.mi$dinterest)
dint.sd <- sd(pol.data.usa.mi$dinterest)

des.sum <- rbind(ur.sum, income.sum,degree.sum,female.sum,age.sum,unempl.sum,emplfull.sum,emplnone.sum,student.sum,retired.sum,church.sum,skillspec.sum,lr.sum,vote.sum,dint.sum)

############################################################
##Apply multiple imputation to data usng Amelia
############################################################

#Impute missing data for both USA and international datasets

#Set the seed
set.seed(02138)
pol.data.bounds <- matrix(c(1,0,25,5,15,98,12,0.8819,20.45), nrow = 3, ncol = 3, byrow=T)

pol.data.amelia.usa <- amelia(pol.data.usa, m=10, cs="ccode.f", idvars=c("weight"), noms=c("female","unempl","empl_full","empl_none","student","retired","vote","dinterest","dnuminfluence","davgcitinfluence"), bounds=pol.data.bounds)

pol.data.amelia.int <- amelia(pol.data, m=10, cs="ccode.f", idvars=c("weight"), noms=c("female","unempl","empl_full","empl_none","student","retired","vote","dinterest","dnuminfluence","davgcitinfluence"), bounds=pol.data.bounds)

uipref.data.amelia.usa <- amelia(uipref.data.usa, m=10, cs="ccode.f", idvars=c("weight"), noms=c("female","unempl","empl_full","empl_none","student","retired","vote","dpro_urr1"), bounds=pol.data.bounds)

uipref.data.amelia.int <- amelia(uipref.data, m=10, cs="ccode.f", idvars=c("weight"), noms=c("female","unempl","empl_full","empl_none","student","retired","vote","dpro_urr1"), bounds=pol.data.bounds)

#Check the imputation results

plot(pol.data.amelia.usa, which.vars=1:10)
plot(pol.data.amelia.int, which.vars=1:10)
plot(uipref.data.amelia.usa, which.vars=1:10)
plot(uipref.data.amelia.usa, which.vars=1:10)

compare.density(pol.data.amelia.usa)

overimpute(pol.data.amelia.usa, var = 3)

missmap(pol.data.amelia.usa)
missmap(pol.data.amelia.int)
missmap(uipref.data.amelia.usa)
missmap(uipref.data.amelia.usa)

disperse(pol.data.amelia.usa)
disperse(pol.data.amelia.int)
disperse(uipref.data.amelia.usa)
disperse(uipref.data.amelia.usa)

############################################################
## Run regressions for political participation
############################################################

##Run regressions and simulations for USA-subsetted variables

pol.data.usa.mi <- data.frame(pol.data.amelia.usa$imputations)
pol.data.int.mi <- data.frame(pol.data.amelia.int$imputations)

surmodel.usa <- list(mu1 = dinterest ~ ur + income + female + age + unempl + empl_full + empl_none + student + retired + church + skillspec + leftright, mu2 = vote ~ ur + income + female + age + unempl + empl_full + empl_none + student + retired + church + skillspec + leftright)

tab.vote.interest.usa <- zelig(surmodel.usa, data=data.frame(pol.data.usa.mi), model="bprobit")
summary(tab.vote.interest.usa)@coef3
xtable(summary(tab.vote.interest.usa)@coef3)

ur.val <- seq(from=0, to=22.4, by=.1) 
inc.val <- seq(from=0, to=9, by=.1)
ur.pp.usa <- data.frame()
inc.pp.usa <- data.frame() 

for(i in ur.val){
	counter <- i
	temp.ur.usa <- setx(tab.vote.interest.usa, ur=counter)
	temp.sim.usa.ur <- sim(tab.vote.interest.usa, x=temp.ur.usa)
	ur.pp.usa <- rbind(ur.pp.usa, c(mean(temp.sim.usa.ur$qi$ev[,4]), quantile(temp.sim.usa.ur$qi$ev[,4],.025),quantile(temp.sim.usa.ur$qi$ev[,4],.975)))
	}
colnames(ur.pp.usa) <- c("Predicted Probability","Lower 95% CI","Upper 95% CI")

for(i in inc.val){
	counter <- i
	temp.inc.usa <- setx(tab.vote.interest.usa, income=counter)
	temp.sim.usa.inc <- sim(tab.vote.interest.usa, x=temp.inc.usa)
	inc.pp.usa <- rbind(inc.pp.usa, c(mean(temp.sim.usa.inc$qi$ev[,4]), quantile(temp.sim.usa.inc$qi$ev[,4],.025),quantile(temp.sim.usa.inc$qi$ev[,4],.975)))
	}
colnames(inc.pp.usa) <- c("Predicted Probability","Lower 95% CI","Upper 95% CI")

pdf(file="occur_pp_usa.pdf")
plot(ur.val, ur.pp.usa[,1], ylim=c(0,1), xlim=c(0,25), type="l", lty="solid", col="black", ylab="Probability of Having Voted in the Last Election and Being Interested in Politics", xlab="Occupational Unemployment Rate")
points(ur.val, ur.pp.usa[,2], xlim=c(0,25), col="black", lty="dotted", type="l")
points(ur.val, ur.pp.usa[,3], xlim=c(0,25), col="black", lty="dotted", type="l")
dev.off()	

pdf(file="income_pp_usa.pdf")
plot(inc.val, inc.pp.usa[,1], ylim=c(0,1), xlim=c(0,9), type="l", lty="solid", col="black", ylab="Probability of Having Voted in the Last Election and Being Interested in Politics", xlab="Family Income in Nine Quantiles")
points(inc.val, inc.pp.usa[,2], xlim=c(0,9), col="black", lty="dotted", type="l")
points(inc.val, inc.pp.usa[,3], xlim=c(0,9), col="black", lty="dotted", type="l")
dev.off()	

##Run regressions and simulations for cross-national variables

tab.vote.interest.int <- zelig(surmodel.usa, data=data.frame(pol.data.int.mi), model="bprobit")
summary(tab.vote.interest.int)

############################################################
## Run regressions for UI preferences
############################################################

##Run regressions and simulations for USA-subsetted variables

uipref.data.usa.mi <- data.frame(uipref.data.amelia.usa$imputations)

tab.uipref.usa <- zelig(dpro_urr1 ~ ur + income + female + age + unempl + empl_full + empl_none + student + retired + church + skillspec + leftright, model="probit", data=uipref.data.usa.mi)

summary(tab.uipref.usa)
xtable(summary(tab.uipref.usa))

ur.uipref.usa <- data.frame()
inc.uipref.usa <- data.frame() 

for(i in ur.val){
	counter <- i
	temp.ur.usa <- setx(tab.uipref.usa, ur=counter)
	temp.sim.usa.ur <- sim(tab.uipref.usa, x=temp.ur.usa)
	ur.uipref.usa <- rbind(ur.uipref.usa, c(mean(temp.sim.usa.ur$qi$ev[,1]), quantile(temp.sim.usa.ur$qi$ev[,1],.025),quantile(temp.sim.usa.ur$qi$ev[,1],.975)))
	}
colnames(ur.uipref.usa) <- c("Predicted Probability","Lower 95% CI","Upper 95% CI")

for(i in inc.val){
	counter <- i
	temp.inc.usa <- setx(tab.uipref.usa, income=counter)
	temp.sim.usa.inc <- sim(tab.uipref.usa, x=temp.inc.usa)
	inc.uipref.usa <- rbind(inc.uipref.usa, c(mean(temp.sim.usa.inc$qi$ev[,1]), quantile(temp.sim.usa.inc$qi$ev[,1],.025),quantile(temp.sim.usa.inc$qi$ev[,1],.975)))
	}
colnames(inc.uipref.usa) <- c("Predicted Probability","Lower 95% CI","Upper 95% CI")

pdf(file="occur_uipref_usa.pdf")
plot(ur.val, ur.uipref.usa[,1], ylim=c(0,1), xlim=c(0,25), type="l", lty="solid", col="black", ylab="Probability of Favoring Higher UI Spending", xlab="Occupational Unemployment Rate")
points(ur.val, ur.uipref.usa[,2], xlim=c(0,25), col="black", lty="dotted", type="l")
points(ur.val, ur.uipref.usa[,3], xlim=c(0,25), col="black", lty="dotted", type="l")
dev.off()	

pdf(file="income_uipref_usa.pdf")
plot(inc.val, inc.uipref.usa[,1], ylim=c(0,1), xlim=c(0,9), type="l", lty="solid", col="black", ylab="Probability of Favoring Higher UI Spending", xlab="Family Income in Nine Quantiles")
points(inc.val, inc.uipref.usa[,2], xlim=c(0,9), col="black", lty="dotted", type="l")
points(inc.val, inc.uipref.usa[,3], xlim=c(0,9), col="black", lty="dotted", type="l")
dev.off()	

############################################################
## Run ROC Curve Check on USA SUR Regressions
############################################################

#Run for Interest in Politics

head(tab.vote.interest.usa@fitted.values)

fitted.dint <- tab.vote.interest.usa@fitted.values[,3] + tab.vote.interest.usa@fitted.values[,4]
head(fitted.dint)

y.dint <- tab.vote.interest.usa@y[,3] + tab.vote.interest.usa@y[,4]
head(y.dint)

cutoff <- c(seq(0, .1, by=.0001), seq(.1, 1, by=.005))
correct0s <- c()
correct1s <- c()
for (i in 1:length(cutoff)) {
  predictions <- as.numeric(fitted.dint > cutoff[i])
  correct0s[i] <- mean(predictions[y.dint==0]==0)
  correct1s[i] <- mean(predictions[y.dint==1]==1)
}
pdf("roc_dint.pdf")
plot(correct1s, correct0s, type="l", xlim=c(0,1), ylim=c(0,1), 
     main="ROC Curve for 'Interest in Politics'",
	 xlab="Proportion of 1's correctly classified",
	 ylab="Proportion of 0's correctly classified")
abline(a=1, b=-1, lty=2)
dev.off()

#Run for Voted in Last Election

head(tab.vote.interest.usa@fitted.values)

fitted.dvote <- tab.vote.interest.usa@fitted.values[,2] + tab.vote.interest.usa@fitted.values[,4]
head(fitted.dint)

y.dvote <- tab.vote.interest.usa@y[,2] + tab.vote.interest.usa@y[,4]
head(y.dvote)

cutoff <- c(seq(0, .1, by=.0001), seq(.1, 1, by=.005))
correct0s <- c()
correct1s <- c()
predictions <- c()
for (i in 1:length(cutoff)) {
  predictions <- as.numeric(fitted.dint > cutoff[i])
  correct0s[i] <- mean(predictions[y.dvote==0]==0)
  correct1s[i] <- mean(predictions[y.dvote==1]==1)
}
pdf("roc_dvote.pdf")
plot(correct1s, correct0s, type="l", xlim=c(0,1), ylim=c(0,1), 
     main="ROC Curve for US ISSP Data on 'Voted in Last Election'",
	 xlab="Proportion of 1's correctly classified",
	 ylab="Proportion of 0's correctly classified")
abline(a=1, b=-1, lty=2)
dev.off()

############################################################
## Load 2002 Turnout and Unemployment Risk Inequality Data
############################################################

#turnout.risk.02 <- read.csv("turnout_riskineq.csv")

#pdf(file="turnout02_riskineq.pdf")
#plot(turnout.risk.02$unempgini, turnout.risk.02$turnout02, cex=0, xlab="Unemployment Gini", ylab="2002 #General Election Voter Turnout", main="Unemployment Gini vs. Voter Turnout, 2002", cex.main=.97)
#text(turnout.risk.02$unempgini, turnout.risk.02$turnout02, labels=turnout.risk.02$stabrev, col="navy", #main="")
#dev.off()	
